/* 

Replication syntax CPS article
"Social change and women's left vote. The role of employment, education, and marriage in the gender vote gap"

Dr. Mathilde M. van Ditmars, University of Lucerne & University of Zurich
Dr. Rosalind Shorrocks, University of Manchester
May 2024 (Stata version 13/18)

File 3/4. BHPS/UKHLS Data preparation and analyses for Tables
*/

*British Household Panel Study (BHPS), years 1999-2009
*UK Household Longitudinal Study (UKHLS), years 2009-2019
*Data version used: Understanding Society Waves 1-10 and Harmonised BHPS: Waves 1-18, 13th Edition.
*see https://beta.ukdataservice.ac.uk/datacatalogue/doi/?id=6614#!#14

*DATASET WORKING FILE CREATION*

version 13
clear all 
set more off

cd "DEFINE"
global inpath "DEFINE"
global output "DEFINE"

** BHPS wave 1
use pidp ba_sex ba_doby ba_mastat ba_isced ba_school ba_jbft_dv ba_jbstat ba_vote4 ba_oprlg2 ba_nchild_dv ba_xrwght ///
	ba_fenow_bh ba_vote1 ba_vote2 ba_gor_dv using "$inpath\bhps_w1\ba_indresp.dta", clear
gen wave=1
gen year=1991

rename ba_* *
*rename xrwght xswght_bhps_wave1

sort pidp wave
compress 
save bhps_wave1.dta, replace

** BHPS wave 2
use pidp bb_sex bb_doby bb_mastat bb_isced bb_school bb_jbft_dv bb_jbstat bb_vote4 bb_nchild_dv bb_lrwght bb_xrwght ///
	bb_fenow_bh bb_vote1 bb_vote2 bb_gor_dv using "$inpath\bhps_w2\bb_indresp.dta", clear
gen wave=2
gen year=1992

rename bb_* *
*rename xrwght xswght_bhps_wave2
*rename lrwght lwght_bhps_wave2

sort pidp wave
compress 
save bhps_wave2.dta, replace

** BHPS wave 3
use pidp bc_sex bc_doby bc_mastat bc_isced bc_school bc_jbft_dv bc_jbstat bc_vote4 bc_oprlg2 bc_nchild_dv bc_lrwght bc_xrwght ///
	bc_fenow_bh bc_vote1 bc_vote2 bc_gor_dv using "$inpath\bhps_w3\bc_indresp.dta", clear
gen wave=3
gen year=1993

rename bc_* *
*rename xrwght xswght_bhps_wave3
*rename lrwght lwght_bhps_wave3

sort pidp wave
compress 
save bhps_wave3.dta, replace

** BHPS wave 4
use pidp bd_sex bd_doby bd_mastat bd_isced bd_school bd_jbft_dv bd_jbstat bd_vote4 bd_oprlg2 bd_nchild_dv bd_lrwght bd_xrwght ///
	bd_fenow_bh bd_vote1 bd_vote2 bd_gor_dv using "$inpath\bhps_w4\bd_indresp.dta", clear
gen wave=4
gen year=1994

rename bd_* *
*rename xrwght xswght_bhps_wave4
*rename lrwght lwght_bhps_wave4


sort pidp wave
compress 
save bhps_wave4.dta, replace

** BHPS wave 5
use pidp be_sex be_doby be_mastat be_isced be_school be_jbft_dv be_jbstat be_vote4 be_oprlg2 be_nchild_dv be_lewght be_xewght ///
	be_fenow_bh be_vote1 be_vote2 be_gor_dv using "$inpath\bhps_w5\be_indresp.dta", clear
gen wave=5
gen year=1995

rename be_* *
*rename xewght xswght_bhps_wave5
*rename lewght lwght_bhps_wave5

sort pidp wave
compress 
save bhps_wave5.dta, replace

** BHPS wave 6
use pidp bf_sex bf_doby bf_mastat bf_isced bf_school bf_jbft_dv bf_jbstat bf_vote4 bf_nchild_dv bf_lrwght bf_xrwght ///
	bf_fenow_bh bf_vote1 bf_vote2 bf_gor_dv using "$inpath\bhps_w6\bf_indresp.dta", clear
gen wave=6
gen year=1996

rename bf_* *
*rename xrwght xswght_bhps_wave6
*rename lrwght lwght_bhps_wave6

sort pidp wave
compress 
save bhps_wave6.dta, replace

** BHPS wave 7
use pidp bg_sex bg_doby bg_mastat bg_isced bg_school bg_jbft_dv bg_jbstat bg_vote4 bg_nchild_dv bg_lrwght bg_xrwght ///
	bg_fenow_bh bg_vote1 bg_vote2 bg_gor_dv using "$inpath\bhps_w7\bg_indresp.dta", clear
gen wave=7
gen year=1997

rename bg_* *
*rename xrwght xswght_bhps_wave7
*rename lrwght lwght_bhps_wave7

sort pidp wave
compress 
save bhps_wave7.dta, replace

** BHPS wave 8
use pidp bh_sex bh_doby bh_mastat bh_isced bh_school bh_jbft_dv bh_jbstat bh_vote4 bh_nchild_dv bh_lrwght bh_xrwght ///
	bh_fenow_bh bh_vote1 bh_vote2 bh_gor_dv using "$inpath\bhps_w8\bh_indresp.dta", clear
gen wave=8
gen year=1998

rename bh_* *
*rename xrwght xswght_bhps_wave8
*rename lrwght lwght_bhps_wave8

sort pidp wave
compress 
save bhps_wave8.dta, replace

** BHPS wave 9
use pidp bi_sex bi_doby bi_mastat bi_isced bi_school bi_jbft_dv bi_jbstat bi_vote4 bi_oprlg2 bi_nchild_dv bi_lrwght bi_xrwght ///
	bi_fenow_bh bi_vote1 bi_vote2 bi_gor_dv using "$inpath\bhps_w9\bi_indresp.dta", clear
gen wave=9
gen year=1999

rename bi_* *
*rename xrwght xswght_bhps_wave9
*rename lrwght lwght_bhps_wave9

sort pidp wave
compress 
save bhps_wave9.dta, replace

** BHPS wave 10
use pidp bj_sex bj_doby bj_mastat bj_isced bj_school bj_jbft_dv bj_jbstat bj_vote4 bj_nchild_dv bj_lrwght bj_xrwght ///
	bj_fenow_bh bj_vote1 bj_vote2 bj_gor_dv using "$inpath\bhps_w10\bj_indresp.dta", clear
gen wave=10
gen year=2000

rename bj_* *
*rename xrwght xswght_bhps_wave10
*rename lrwght lwght_bhps_wave10

sort pidp wave
compress 
save bhps_wave10.dta, replace

** BHPS wave 11
use pidp bk_sex bk_doby bk_mastat bk_isced bk_school bk_jbft_dv bk_jbstat bk_vote4 bk_oprlg2 bk_nchild_dv bk_lrwght bk_xrwght ///
	bk_fenow_bh bk_vote1 bk_vote2 bk_gor_dv using "$inpath\bhps_w11\bk_indresp.dta", clear
gen wave=11
gen year=2001

rename bk_* *
*rename xrwght xswght_bhps_wave11
*rename lrwght lwght_bhps_wave11

sort pidp wave
compress 
save bhps_wave11.dta, replace

** BHPS wave 12
use pidp bl_sex bl_doby bl_mastat bl_isced bl_school bl_jbft_dv bl_jbstat bl_vote4 bl_nchild_dv bl_lrwght bl_xrwght ///
	bl_fenow_bh bl_vote1 bl_vote2 bl_gor_dv using "$inpath\bhps_w12\bl_indresp.dta", clear
gen wave=12
gen year=2002

rename bl_* *
*rename xrwght xswght_bhps_wave12
*rename lrwght lwght_bhps_wave12

sort pidp wave
compress 
save bhps_wave12.dta, replace

** BHPS wave 13
use pidp bm_sex bm_doby bm_mastat bm_isced bm_school bm_jbft_dv bm_jbstat bm_vote4 bm_nchild_dv bm_lrwght bm_xrwght ///
	bm_fenow_bh bm_vote1 bm_vote2 bm_gor_dv using "$inpath\bhps_w13\bm_indresp.dta", clear
gen wave=13
gen year=2003

rename bm_* *
*rename xrwght xswght_bhps_wave13
*rename lrwght lwght_bhps_wave13

sort pidp wave
compress 
save bhps_wave13.dta, replace

** BHPS wave 14
use pidp bn_sex bn_doby bn_mastat bn_isced bn_school bn_jbft_dv bn_jbstat bn_vote4 bn_oprlg2 bn_nchild_dv bn_lrwght bn_xrwght ///
	bn_vote1 bn_vote2 bn_gor_dv using "$inpath\bhps_w14\bn_indresp.dta", clear
gen wave=14
gen year=2004

rename bn_* *
*rename xrwght xswght_bhps_wave14
*rename lrwght lwght_bhps_wave14

sort pidp wave
compress 
save bhps_wave14.dta, replace

** BHPS wave 15
use pidp bo_sex bo_doby bo_mastat bo_isced bo_school bo_jbft_dv bo_jbstat bo_vote4 bo_nchild_dv bo_lrwght bo_xrwght ///
	bo_fenow_bh bo_vote1 bo_vote2 bo_gor_dv using "$inpath\bhps_w15\bo_indresp.dta", clear
gen wave=15
gen year=2005

rename bo_* *
*rename xrwght xswght_bhps_wave15
*rename lrwght lwght_bhps_wave15

sort pidp wave
compress 
save bhps_wave15.dta, replace

** BHPS wave 16
use pidp bp_sex bp_doby bp_mastat bp_isced bp_school bp_jbft_dv bp_jbstat bp_vote4 bp_oprlg2 bp_nchild_dv bp_lrwght bp_xrwght ///
	bp_fenow_bh bp_vote1 bp_vote2 bp_gor_dv using "$inpath\bhps_w16\bp_indresp.dta", clear
gen wave=16
gen year=2006

rename bp_* *
*rename xrwght xswght_bhps_wave16
*rename lrwght lwght_bhps_wave16

sort pidp wave
compress 
save bhps_wave16.dta, replace

** BHPS wave 17
use pidp bq_sex bq_doby bq_mastat bq_isced bq_school bq_jbft_dv bq_jbstat bq_vote4 bq_nchild_dv bq_lrwght bq_xrwght ///
	bq_fenow_bh bq_vote1 bq_vote2 bq_gor_dv using "$inpath\bhps_w17\bq_indresp.dta", clear
gen wave=17
gen year=2007

rename bq_* *
*rename xrwght xswght_bhps_wave17
*rename lrwght lwght_bhps_wave17

sort pidp wave
compress 
save bhps_wave17.dta, replace

** BHPS wave 18
use pidp br_sex br_doby br_mastat br_isced br_school br_jbft_dv br_jbstat br_vote4 br_oprlg2 br_nchild_dv br_lrwght br_xrwght ///
	br_fenow_bh br_vote1 br_vote2 br_gor_dv using "$inpath\bhps_w18\br_indresp.dta", clear
gen wave=18
gen year=2008

rename br_* *
*rename xrwght xswght_bhps_wave18
*rename lrwght lwght_bhps_wave18

sort pidp wave
compress 
save bhps_wave18.dta, replace

** UKHLS wave 1
use pidp a_sex a_birthy a_dvage a_mastat_dv a_qfhigh_dv a_school a_jbstat a_jbft_dv a_vote4 a_oprlg2 a_nchild_dv ///
	a_intdaty_dv a_indinus_xw a_fenow a_qfhigh a_qfvoc* a_vote1 a_vote2 a_emboost a_gor_dv using "$inpath\ukhls_w1\a_indresp.dta", clear
gen wave=19
gen year=a_intdaty_dv

rename a_* *
*rename indinus_xw xswght_ukhls_wave19
rename indinus_xw xrwght

sort pidp wave
compress
save ukhls_wave19.dta, replace

** UKHLS wave 2
use pidp b_sex b_birthy b_dvage b_mastat_dv b_qfhigh_dv b_nqfhigh_dv b_school b_jbstat b_jbft_dv b_vote4 b_nchild_dv ///
	b_intdaty_dv b_indinus_xw b_fenow b_qfhigh b_qfvoc* b_vote1 b_vote2 b_gor_dv using "$inpath\ukhls_w2\b_indresp.dta", clear
gen wave=20
gen year=b_intdaty_dv

rename b_* *
*rename indinub_xw xswght_ukhls_wave20
rename indinus_xw xrwght

sort pidp wave
compress
save ukhls_wave20.dta, replace

** UKHLS wave 3
use pidp c_sex c_birthy c_dvage c_mastat_dv c_qfhigh_dv c_nqfhigh_dv c_school c_jbstat c_jbft_dv c_vote4 c_nchild_dv ///
	c_intdaty_dv c_indinub_xw c_indinub_lw c_fenow c_qfhigh c_qfvoc* c_vote1 c_vote2 c_gor_dv using "$inpath\ukhls_w3\c_indresp.dta", clear
gen wave=21
gen year=c_intdaty_dv

rename c_* *
*rename indinub_xw xswght_ukhls_wave21
*rename indinub_lw lwght_ukhls_wave21
rename indinub_xw xrwght

sort pidp wave
compress
save ukhls_wave21.dta, replace

** UKHLS wave 4
use pidp d_sex d_birthy d_dvage d_mastat_dv d_qfhigh_dv d_nqfhigh_dv d_school d_jbstat d_jbft_dv d_vote4 d_oprlg2 d_nchild_dv ///
	d_intdaty_dv d_indinub_xw d_indinub_lw d_fenow d_qfhigh d_qfvoc* d_vote1 d_vote2 d_gor_dv using "$inpath\ukhls_w4\d_indresp.dta", clear
gen wave=22
gen year=d_intdaty_dv

rename d_* *
*rename indinub_xw xswght_ukhls_wave22
*rename indinub_lw lwght_ukhls_wave22
rename indinub_xw xrwght

sort pidp wave
compress
save ukhls_wave22.dta, replace

** UKHLS wave 5
use pidp e_sex e_birthy e_dvage e_mastat_dv e_qfhigh_dv e_nqfhigh_dv e_school e_jbstat e_jbft_dv e_vote4 e_nchild_dv ///
	e_intdaty_dv e_indinub_xw e_indinub_lw e_fenow e_qfhigh e_qfvoc* e_vote1 e_vote2 e_gor_dv using "$inpath\ukhls_w5\e_indresp.dta", clear
gen wave=23
gen year=e_intdaty_dv

rename e_* *
*rename indinub_xw xswght_ukhls_wave23
*rename indinub_lw lwght_ukhls_wave23
rename indinub_xw xrwght

sort pidp wave
compress
save ukhls_wave23.dta, replace

** UKHLS wave 6
use pidp f_sex f_birthy f_dvage f_mastat_dv f_qfhigh_dv f_nqfhigh_dv f_school f_jbstat f_jbft_dv f_vote4 f_nchild_dv ///
	f_intdaty_dv f_indinub_xw f_indinub_lw f_fenow f_qfhigh f_qfvoc* f_vote1 f_vote2 f_gor_dv using "$inpath\ukhls_w6\f_indresp.dta", clear
gen wave=24
gen year=f_intdaty_dv

rename f_* *
*rename indinui_xw xswght_ukhls_wave24
*rename indinub_lw lwght_ukhls_wave24
rename indinub_xw xrwght

sort pidp wave
compress
save ukhls_wave24.dta, replace

** UKHLS wave 7
use pidp g_sex g_birthy g_dvage g_mastat_dv g_qfhigh_dv g_nqfhigh_dv g_school g_jbstat g_jbft_dv g_vote4 g_nchild_dv ///
	g_intdaty_dv g_indinub_xw g_indinus_lw g_fenow g_qfhigh g_qfvoc* g_vote1 g_vote2 g_gor_dv using "$inpath\ukhls_w7\g_indresp.dta", clear
gen wave=25
gen year=g_intdaty_dv

rename g_* *
*rename indinui_xw xswght_ukhls_wave25
*rename indinui_lw lwght_ukhls_wave25
rename indinub_xw xrwght

sort pidp wave
compress
save ukhls_wave25.dta, replace

** UKHLS wave 8
use pidp h_sex h_birthy h_dvage h_mastat_dv h_qfhigh_dv h_nqfhigh_dv h_school h_jbstat h_jbft_dv h_oprlg2 h_nchild_dv ///
	h_intdaty_dv h_indinub_xw h_indinui_lw h_fenow h_qfhigh h_qfvoc* h_gor_dv using "$inpath\ukhls_w8\h_indresp.dta", clear
gen wave=26
gen year=h_intdaty_dv

rename h_* *
*rename indinui_xw xswght_ukhls_wave26
*rename indinui_lw lwght_ukhls_wave26
rename indinub_xw xrwght

sort pidp wave
compress
save ukhls_wave26.dta, replace

** UKHLS wave 9
use pidp i_sex i_birthy i_dvage i_mastat_dv i_qfhigh_dv i_nqfhigh_dv i_school i_jbstat i_jbft_dv i_vote4 i_nchild_dv ///
	i_intdaty_dv i_indinub_xw i_indinui_lw i_fenow i_qfhigh i_qfvoc* i_vote1 i_vote2 i_gor_dv using "$inpath\ukhls_w9\i_indresp.dta", clear
gen wave=27
gen year=i_intdaty_dv

rename i_* *
*rename indinui_xw xswght_ukhls_wave27
*rename indinui_lw lwght_ukhls_wave27
rename indinub_xw xrwght

sort pidp wave
compress
save ukhls_wave27.dta, replace

** UKHLS wave 10
use pidp j_sex j_birthy j_dvage j_mastat_dv j_qfhigh_dv j_nqfhigh_dv j_school j_jbstat j_jbft_dv j_vote4 j_nchild_dv ///
	j_intdaty_dv j_indinub_xw j_indinui_lw j_fenow j_qfhigh j_qfvoc* j_vote1 j_vote2 j_gor_dv using "$inpath\ukhls_w10\j_indresp.dta", clear
gen wave=28
gen year=j_intdaty_dv

rename j_* *
*rename indinui_xw xswght_ukhls_wave28
*rename indinui_lw lwght_ukhls_wave28
rename indinub_xw xrwght

sort pidp wave
compress
save ukhls_wave28.dta, replace

** merge all waves
clear all
use bhps_wave1.dta
append using bhps_wave2.dta
append using bhps_wave3.dta
append using bhps_wave4.dta
append using bhps_wave5.dta
append using bhps_wave6.dta
append using bhps_wave7.dta
append using bhps_wave8.dta
append using bhps_wave9.dta
append using bhps_wave10.dta
append using bhps_wave11.dta
append using bhps_wave12.dta
append using bhps_wave13.dta
append using bhps_wave14.dta
append using bhps_wave15.dta
append using bhps_wave16.dta
append using bhps_wave17.dta
append using bhps_wave18.dta
append using ukhls_wave19.dta
append using ukhls_wave20.dta
append using ukhls_wave21.dta
append using ukhls_wave22.dta
append using ukhls_wave23.dta
append using ukhls_wave24.dta
append using ukhls_wave25.dta
append using ukhls_wave26.dta
append using ukhls_wave27.dta
append using ukhls_wave28.dta

** merge in age left school variable

merge m:1 pidp using "$inpath\ukhls_wx\xwavedat.dta", keepusing(pidp school_dv scend_dv feend_dv)
drop _merge

save bhps_ukhls.dta, replace

* Set the variables that order the panel
sort pidp wave
xtset pidp wave

** reshape to identify EM Boost members
drop if wave==.
drop qfvoc*
rename vote1 voteone
rename vote2 votetwo
rename oprlg2 oprlgtwo
rename vote4 votefour
reshape wide xrwght doby sex school fenow_bh voteone votetwo oprlgtwo mastat nchild_dv jbft_dv isced ///
	votefour jbstat year lrwght lewght xewght dvage birthy qfhigh ///
	fenow intdaty_dv mastat_dv qfhigh_dv nqfhigh_dv indinub_lw indinus_lw ///
	indinui_lw emboost gor_dv, i(pidp) j(wave)
	
** drop EM Boost members
drop if emboost19==1

keep pidp
	
** merge back into original data
merge 1:m pidp using bhps_ukhls.dta
save bhps_ukhls_workfile.dta

*RECODING*

* Set the variables that order the panel
sort pidp wave
xtset pidp wave

* set missings to .
recode _all (-1=.a) (-2=.b) (-7=.c) (-8=.d) (-9=.e) (-10=.f)
/*.a don't know
.b refusal
.c proxy
.d inapplicable
.e missing
*/


* age
replace dvage=year-doby if year<=2008&year!=.&doby!=.

* Define analytic sample: respondents aged 18-65 who are eligible to vote and who live in England, years 1999 onwards
capture drop sample
gen sample=0
replace sample=1 if dvage>=18&dvage<=65&gor_dv!=10&gor_dv!=11&gor_dv!=12&gor_dv!=13&vote4!=96&year>=1999&year<=2019

keep if sample==1

* gender
gen female=1 if sex==2
replace female=0 if sex==1
label define femlab 1 "female" 2 "male"
label values female femlab

tab female sex

* recode marital status and exclude widowers. Non-married cohabiters are included in single category
gen marital = 0 if mastat==6|mastat_dv==1|mastat==2|mastat_dv==10
replace marital = 1 if mastat==1|mastat_dv==2|mastat_dv==3
replace marital = 2 if mastat==4|mastat==5|mastat_dv==4|mastat_dv==5|mastat_dv==7|mastat_dv==8
label define maritallab 0 "single" 1 "married/partnership" 2 "divorced/separated"
label values marital maritallab 
tab marital mastat_dv

* labour force participation
gen work = 1 if jbstat==1|jbstat==2
replace work = 0 if jbstat>=3&jbstat<=97
label define employlab 1 "employed" 0 "not employed"
label values work employlab

*educational categories
* Less than upper secondary includes those who left school 16 or younger and have "none of the above" in qfhigh_dv
* Other includes 'not defined' (isced) and those who left school 17+ and have "none of the above" in qfhigh_dv
* We set this category to missing
gen edu4=1 if isced==1|isced==2|isced==3|scend_dv<=16&qfhigh==96|isced==0&jbstat!=7|qfhigh_dv==13| ///
	qfhigh_dv==14|qfhigh_dv==15|qfhigh_dv==16
replace edu4=2 if isced==4|qfhigh_dv==7|qfhigh_dv==8|qfhigh_dv==9|qfhigh_dv==10|qfhigh_dv==11| ///
	qfhigh_dv==12
replace edu4=3 if isced==5|qfhigh_dv==5|qfhigh_dv==4|qfhigh_dv==3
replace edu4=4 if isced==6|isced==7|qfhigh_dv==2|qfhigh_dv==1|qfhigh_dv==6
*replace edu4=5 if isced==0|qfhigh_dv==96&scend_dv>=17
label define edu4 ///
1 "Less than upper secondary education" /// primary education 
2 "Upper secondary education" /// apprenticeship, vocational school, bac/maturité
3 "Post-secondary/tertiary vocational" /// vocational post-secondary (Hochschule)
4 "University" // University
label values edu4 edu4
tab edu4

* religiosity
gen attend = 1 if oprlg2>=1&oprlg2<=2
replace attend = 0 if oprlg2>=3&oprlg2<=5
label define attendlab 0 "once a year or less" 1 "once a month or more"
label values attend attendlab

gen attend_c = attend
bysort pidp (wave): replace attend_c=attend_c[_n-1] if attend_c==.

* left party support - edited to exclude none from the base category
** Labour only
gen left = 1 if vote4==2
replace left = 0 if vote4==1|vote4>=3&vote4<=97
replace left = . if vote1==2&vote4==.d|vote2==2

** Labour plus Greens
gen left_gr = 1 if vote4==2|vote4==6
replace left_gr = 0 if vote4==1|vote4>=3&vote4<=5|vote4>=7&vote4<=97
replace left_gr = . if vote1==2&vote4==.d|vote2==2

* children - own children in household
gen kids = 0 if nchild_dv==0
replace kids = 1 if nchild_dv>=1&nchild_dv<=10
label define kidslab 0 "no children of own in household" 1 "children of own in household"
label values kids kidslab

* Generations of respondents
gen year_birth=year-dvage
tab year_birth

capture drop cohort
recode year_birth (1934/1964=3) (1965/1980=4) (1981/2001=5), gen(cohort)
// "Baby boomers 1946-1964" ///
// "Gen X 1965-1980" ///
//"Millennials 1981-1996"
 
label define cohort ///
3 "Baby boomers and older" ///
4 "Gen X" ///
5 "Millennials and younger"
label values cohort cohort
tab cohort

*part-time and full-time work vs. everything else
gen ftwork=1 if jbft_dv==1
replace ftwork=0 if jbft_dv==2|jbstat>=3&jbstat<=97
label define ft 0 "part-time/not employed" 1 "full-time employed"
label values ftwork ft

gen ptwork=1 if jbft_dv==2
replace ptwork=0 if jbft_dv==1|jbstat>=3&jbstat<=97
label define pt 0 "full-time/not employed" 1 "part-time employed"
label values ptwork pt

*3 categories: part-time, full-time, not working, vs. everything else
gen work3=1 if jbft_dv==1
replace work3=2 if jbft_dv==2
replace work3=3 if jbstat==3|jbstat==6
replace work3=0 if jbstat==4|jbstat==5|jbstat==7|jbstat==8|jbstat==9|jbstat==10|jbstat==11|jbstat==97
label define work3 1 "full-time employed" 2 "part-time employed" 3 "unemployed/family care" 0 "other"
label values work3 work3

tab1 work work3 ftwork ptwork

*** Transitions

/*transitions:

1. EDUCATION
*a. obtaining a post-secondary vocational degree
*b. obtaining a university degree

2. EMPLOYMENT
* a. employed —> economic inactivity/housework
* b. full-time work -> part-time work
* c. part-time work —> full-time work
* d. economic inactivity/housework -> employed
* e. education -> employed 

4. CIVIL STATUS
*a. unmarried -> married/reg. partnership
*b. married/reg. partnership -> divorced/separated

==> these transitions will overlap for some people, e.g. students working part-time

*/


*Count number of waves per person

* Generate an identifier for the first time an individual was included in the sample and one for the last time
sort pidp wave
bysort pidp: gen total= _N
bysort pidp: gen num_wave= _n
gen last_obs= total==num_wave

by pidp, sort: gen nfirst= _n==1
count if nfirst
by pidp, sort: gen nlast= _n==_N
count if nlast


tab nfirst nlast

** Educational transitions
capture drop first_edu2 //[first observation of this level of education]
bysort pidp (wave): gen first_edu2 = (edu4[_n-1]<2 & edu4==2)

*identify wave number in which obtained the degree:
capture drop edu2_fw // [first wave after transition]
by pidp (wave): gen edu2_fw=num_wave if first_edu2==1

*impute this wave number for all person-years
capture drop wave_edu2
by pidp (wave): egen wave_edu2=max(edu2_fw) 

*define transition variable for wave of transition + those after
capture drop edu2_trans
gen edu2_trans=0

*keep 1 after the transition
bysort pidp (wave): replace edu2_trans=1 if edu4==2 & num_wave>=wave_edu2

label var edu2_trans "got upper sec degree"
xttab edu2_trans

*a. obtaining a post-secondary vocational degree
capture drop first_edu3 //[first observation of this level of education]
bysort pidp (wave): gen first_edu3 = (edu4[_n-1]<3 & edu4==3)

*identify wave number in which obtained the degree:
capture drop edu3_fw // [first wave after transition]
by pidp (wave): gen edu3_fw=num_wave if first_edu3==1

*impute this wave number for all person-years
capture drop wave_edu3
by pidp (wave): egen wave_edu3=max(edu3_fw) 

*define transition variable for wave of transition + those after
capture drop edu3_trans
gen edu3_trans=0

bysort pidp (wave): replace edu3_trans=1 if edu4==3 & num_wave>=wave_edu3

label var edu3_trans "got vocat post-sec degree"
xttab edu3_trans

*b. obtaining a university degree
capture drop first_edu4 //[first observation of this level of education]
bysort pidp (wave): gen first_edu4 = (edu4[_n-1]<4 & edu4==4)

*identify wave number in which obtained the degree:
capture drop edu4_fw // [first wave after transition]
by pidp (wave): gen edu4_fw=num_wave if first_edu4==1

*impute this wave number for all person-years
capture drop wave_edu4
by pidp (wave): egen wave_edu4=max(edu4_fw) 

*define transition variable for wave of transition + those after
capture drop edu4_trans
gen edu4_trans=0

bysort pidp (wave): replace edu4_trans=1 if edu4==4 & num_wave>=wave_edu4

label var edu4_trans "got uni degree"
xttab edu4_trans 



*2. LABOUR MARKET/WORK transitions
* a. work —> economic inactivity/housework
* b. full-time work -> part-time work
* c. part-time work —> full-time work
* d. economic inactivity/housework (unemployed + man/woman at home) -> work
* e. education -> work



*a. work —> economic inactivity/housework (unemployed + man/woman at home)

capture drop first_wrk1 //[first observation after making this work/employment transition]
bysort pidp (wave): gen first_wrk1 = (work[_n-1]==1 & work3==3) // first work, then econ inactive

*identify wave number in which made the transition:
capture drop wrk1_fw // [first wave after transition]
by pidp (wave): gen wrk1_fw=num_wave if first_wrk1==1

*impute this wave number for all person-years
capture drop wave_wrk1
by pidp (wave): egen wave_wrk1=max(wrk1_fw) 

*define transition variable for wave of transition + those after
capture drop wrk1_trans
gen wrk1_trans=0

bysort pidp (wave): replace wrk1_trans=1 if work3==3 & num_wave>=wave_wrk1

label var wrk1_trans "work to econ inactive"
xttab wrk1_trans 


*b. full-time work -> part-time work

capture drop first_wrk2 //[first observation after making this work/employment transition]
bysort pidp (wave): gen first_wrk2 = (work3[_n-1]==1 & work3==2) // first FT work, then PT work

*identify wave number in which made the transition:
capture drop wrk2_fw // [first wave after transition]
by pidp (wave): gen wrk2_fw=num_wave if first_wrk2==1

*impute this wave number for all person-years
capture drop wave_wrk2
by pidp (wave): egen wave_wrk2=max(wrk2_fw) 

*define transition variable for wave of transition + those after
capture drop wrk2_trans
gen wrk2_trans=0

bysort pidp (wave): replace wrk2_trans=1 if work3==2 & num_wave>=wave_wrk2

label var wrk2_trans "FT to PT work"
xttab wrk2_trans

*c. part-time work —> full-time work

capture drop first_wrk3 //[first observation after making this work/employment transition]
bysort pidp (wave): gen first_wrk3 = (work3[_n-1]==2 & work3==1) // first PT work, then FT work

*identify wave number in which made the transition:
capture drop wrk3_fw // [first wave after transition]
by pidp (wave): gen wrk3_fw=num_wave if first_wrk3==1

*impute this wave number for all person-years
capture drop wave_wrk3
by pidp (wave): egen wave_wrk3=max(wrk3_fw) 

*define transition variable for wave of transition + those after
capture drop wrk3_trans
gen wrk3_trans=0

bysort pidp (wave): replace wrk3_trans=1 if work3==1 & num_wave>=wave_wrk3

label var wrk3_trans "PT to FT work"
xttab wrk3_trans



*d. economic inactivity/housework (unemployed + man/woman at home) -> work
capture drop first_wrk4 //[first observation after making this work/employment transition]
bysort pidp (wave): gen first_wrk4 = (work3[_n-1]==3 & jbstat[_n-1]!=7 & jbstat[_n-1]!=9 & work==1) 
// first econ inactive (NOT in education or training), then work

*identify wave number in which made the transition:
capture drop wrk4_fw // [first wave after transition]
by pidp (wave): gen wrk4_fw=num_wave if first_wrk4==1

*impute this wave number for all person-years
capture drop wave_wrk4
by pidp (wave): egen wave_wrk4=max(wrk4_fw) 

*define transition variable for wave of transition + those after
capture drop wrk4_trans
gen wrk4_trans=0

bysort pidp (wave): replace wrk4_trans=1 if work==1 & num_wave>=wave_wrk4

label var wrk4_trans "econ inactive to work"
xttab wrk4_trans 

*e. education -> work
capture drop first_lb //[first observation in the labour market after being in education]
bysort pidp (wave): gen first_lb = (jbstat[_n-1]==7 & work==1|jbstat[_n-1]==9 & work==1)

*identify wave number in which obtained the degree:
capture drop lb_fw // [first wave after transition]
by pidp (wave): gen lb_fw=num_wave if first_lb==1

*impute this wave number for all person-years
capture drop wave_lb
by pidp (wave): egen wave_lb=max(lb_fw) 

*define transition variable for wave of transition + those after
capture drop wrk5_trans
gen wrk5_trans=0

bysort pidp (wave): replace wrk5_trans=1 if work==1 & num_wave>=wave_lb

label var wrk5_trans "edu to work"
xttab wrk5_trans 




*4. CIVIL STATUS

*a. unmarried to married
capture drop first_civ1 //[first observation after making this transition]
bysort pidp (wave): gen first_civ1 = (marital[_n-1]==0 & marital==1) // first unmarried, then married/reg part

*identify wave number in which made the transition:
capture drop civ1_fw // [first wave after transition]
by pidp (wave): gen civ1_fw=num_wave if first_civ1==1

*impute this wave number for all person-years
capture drop wave_civ1
by pidp (wave): egen wave_civ1=max(civ1_fw) 

*define transition variable for wave of transition + those after
capture drop civ1_trans
gen civ1_trans=0

bysort pidp (wave): replace civ1_trans=1 if marital==1 & num_wave>=wave_civ1

label var civ1_trans "unmarried to married/civil partnership"
xttab civ1_trans 


*b. married to divorced/separated
capture drop first_civ2 //[first observation after making this transition]
bysort pidp (wave): gen first_civ2 = (marital[_n-1]==1 & marital==2) // first married, then divorced/separated

*identify wave number in which made the transition:
capture drop civ2_fw // [first wave after transition]
by pidp (wave): gen civ2_fw=num_wave if first_civ2==1

*impute this wave number for all person-years
capture drop wave_civ2
by pidp (wave): egen wave_civ2=max(civ2_fw) 

*define transition variable for wave of transition + those after
capture drop civ2_trans
gen civ2_trans=0

bysort pidp (wave): replace civ2_trans=1 if marital==2 & num_wave>=wave_civ2

label var civ2_trans "married/partnership to divorced/separated"
xttab civ2_trans 

*ANALYSIS*

set scheme cleanplots
numlabel, add


** MODELS **

// drop people who change gender
bysort pidp (wave): egen max_female=max(female)
bysort pidp (wave): egen min_female=min(female)

gen gender_diff=0
replace gender_diff=1 if max_female!=min_female
drop if gender_diff==1

*1. baseline model (Table S1)

set more off

xtreg left_gr i.marital##female ib2.edu4##female ib3.work3##female i.attend_c i.kids i.year, fe vce(cluster pidp)
outreg2 using "${output}/BHPS_UKHLS_baseline.xls", replace ctitle(left_gr FE) label

*add BE effects for comparison
xtreg left_gr i.marital##female ib2.edu4##female ib3.work3##female i.attend_c i.kids i.year, be
outreg2 using "${output}/BHPS_UKHLS_baseline.xls", append ctitle(left_gr BE) label

//replicate without the Greens (Table S3)
xtreg left i.marital##female ib2.edu4##female ib3.work3##female i.attend_c i.kids i.year, fe vce(cluster pidp)
outreg2 using "${output}/BHPS_UKHLS_baseline.xls", append ctitle(left FE) label

*add BE effects for comparison
xtreg left i.marital##female ib2.edu4##female ib3.work3##female i.attend_c i.kids i.year, be
outreg2 using "${output}/BHPS_UKHLS_baseline.xls", append ctitle(left BE) label

//robustness check: without year dummies but controlling for age (Table S5)
xtreg left_gr i.marital##female ib2.edu4##female ib3.work3##female i.attend_c i.kids c.dvage, fe vce(cluster pidp)
outreg2 using "${output}/BHPS_UKHLS_baseline.xls", append ctitle(left_gr FE 2) label

*add BE effects for comparison
xtreg left_gr i.marital##female ib2.edu4##female ib3.work3##female i.attend_c i.kids c.dvage, be
outreg2 using "${output}/BHPS_UKHLS_baseline.xls", append ctitle(left_gr BE 2) label


//robustness check: including year dummies, and including cohort. only for BE (Table S5))
xtreg left_gr i.marital##female ib2.edu4##female ib3.work3##female i.attend_c i.kids i.cohort i.year, be
outreg2 using "${output}/BHPS_UKHLS_baseline.xls", append ctitle(cohort BE) label



*2. TRANSITION ANALYSIS 

set more off

//same analytic sample as baseline model
xtreg left_gr i.marital##female ib2.edu4##female ib3.work3##female i.attend_c i.kids i.year, fe vce(cluster pidp)
capture drop sample_trans
gen sample_trans=1 if e(sample)

//Table S2
xtreg left_gr civ1_trans civ2_trans edu3_trans edu4_trans wrk5_trans wrk1_trans wrk2_trans wrk3_trans wrk4_trans  i.attend_c i.kids  i.year, fe vce(cluster pidp), if female==1 & sample_trans==1
outreg2 using "${output}/ENG_transitions.xls", replace ctitle(left_gr women) label

xtreg left_gr civ1_trans civ2_trans edu3_trans edu4_trans wrk5_trans wrk1_trans wrk2_trans wrk3_trans wrk4_trans  i.attend_c i.kids  i.year, fe vce(cluster pidp), if female==0 & sample_trans==1
outreg2 using "${output}/ENG_transitions.xls", append ctitle(left_gr men) label

// replicate without year dummies but controlling for age: (Table S6)
xtreg left_gr civ1_trans civ2_trans edu3_trans edu4_trans wrk5_trans wrk1_trans wrk2_trans wrk3_trans wrk4_trans i.attend_c i.kids c.dvage, fe vce(cluster pidp), if female==1 & sample_trans==1
outreg2 using "${output}/ENG_transitions.xls", append ctitle(left_gr women 2) label

xtreg left_gr civ1_trans civ2_trans edu3_trans edu4_trans wrk5_trans wrk1_trans wrk2_trans wrk3_trans wrk4_trans i.attend_c i.kids c.dvage, fe vce(cluster pidp), if female==0 & sample_trans==1
outreg2 using "${output}/ENG_transitions.xls", append ctitle(left_gr men 2) label

*replicate without the Greens (Table S4)
xtreg left civ1_trans civ2_trans edu3_trans edu4_trans wrk5_trans wrk1_trans wrk2_trans wrk3_trans wrk4_trans i.attend_c i.kids  i.year, fe vce(cluster pidp), if female==1 & sample_trans==1
outreg2 using "${output}/ENG_transitions.xls", append ctitle(left women) label

xtreg left civ1_trans civ2_trans edu3_trans edu4_trans wrk5_trans wrk1_trans wrk2_trans wrk3_trans wrk4_trans  i.attend_c i.kids  i.year, fe vce(cluster pidp), if female==0 & sample_trans==1
outreg2 using "${output}/ENG_transitions.xls", append ctitle(left men) label

//Table 1
*compute frequencies of transitions in our analytic sample [how many people we observe undergo this]
xtreg left_gr civ1_trans civ2_trans edu3_trans edu4_trans wrk1_trans wrk2_trans wrk3_trans wrk4_trans wrk5_trans i.attend_c i.kids  i.year, fe vce(cluster pidp), if sample_trans==1

xttab first_civ1  if e(sample)
xttab first_civ2  if e(sample)
xttab first_edu2  if e(sample)
xttab first_edu3  if e(sample)
xttab first_edu4  if e(sample)
xttab first_wrk1  if e(sample)
xttab first_wrk2  if e(sample)
xttab first_wrk3  if e(sample)
xttab first_wrk4  if e(sample)
xttab first_lb  if e(sample)


***REPLICATE RESULTS WITHOUT RW POPULIST PARTIES IN REFERENCE CATEGORY****
gen left_exprp = 1 if vote4==2|vote4==6
replace left_exprp = 0 if vote4==1|vote4>=3&vote4<=5|vote4>=7&vote4<=97
replace left_exprp = . if vote1==2&vote4==.d|vote2==2|vote4==12

tab1 left_gr left_exprp,m


*1. BASELINE MODELS (Table S7)

xtreg left_exprp i.marital##female ib2.edu4##female ib3.work3##female i.attend_c i.kids i.year, fe vce(cluster pidp)
outreg2 using "${output}/ENG_appendix_PRP_1.xls", replace ctitle(right FE) label

*add BE effects for comparison
xtreg left_exprp i.marital##female ib2.edu4##female ib3.work3##female i.attend_c i.kids i.year, be
outreg2 using "${output}/ENG_appendix_PRP_1.xls", append ctitle(right BE) label


*2. TRANSITION ANALYSIS (Table S8)
xtreg left_exprp i.marital##female ib2.edu4##female ib3.work3##female i.attend_c i.kids i.year, fe vce(cluster pidp)
capture drop sample_trans
gen sample_trans=1 if e(sample)

xtreg left_exprp civ1_trans civ2_trans edu3_trans edu4_trans wrk5_trans wrk1_trans wrk2_trans wrk3_trans wrk4_trans  i.attend_c i.kids  i.year, fe vce(cluster pidp), if female==1 & sample_trans==1
outreg2 using "${output}/ENG_appendix_PRP_2.xls", replace ctitle(right women) label

xtreg left_exprp civ1_trans civ2_trans edu3_trans edu4_trans wrk5_trans wrk1_trans wrk2_trans wrk3_trans wrk4_trans  i.attend_c i.kids  i.year, fe vce(cluster pidp), if female==0 & sample_trans==1
outreg2 using "${output}/ENG_appendix_PRP_2.xls", append ctitle(right men) label





*DESCRIPTIVES*
xtreg left_gr i.marital##female ib2.edu4##female ib3.work3##female i.attend_c i.kids i.year, be

ttest left_gr, by(female), if e(sample)
// gender gap of -.032 (women more Lab/Green 

ttest left, by(female), if e(sample)
// gender gap of -.-22



xtreg left_gr female##year, re, if e(sample)&year!=2020
margins, dydx(female) at(year=(1999(1)2019)) post coeflegend
est store gendergap

coefplot (gendergap, label(Difference between men and women))||, ///
vertical recast(scatter) ///
rename(1._at=1999 2._at=2000 3._at=2001 4._at=2002 5._at=2003 6._at=2004 7._at=2005 8._at=2006 9._at=2007 ///
10._at=2008 11._at=2009 12._at=2010 13._at=2011 14._at=2012 15._at=2013 16._at=2014 17._at=2015 18._at=2016 ///
19._at=2017 20._at=2018 21._at=2019) ///
xlabel(, angle(45)) xscale(lstyle(none)) ytitle("Probability of supporting LW party") yscale(lstyle(none)) ///
byopts(legend(position(6))) plotregion(lcolor(gs10) lwidth(small)) yline(0) ///
yscale (range (-.02 .1)) ylabel(-.02 (.02) .1) ///
title("England: gender gap in voting", margin(medium)) legend(cols(2) position(6))
graph save "${output}/ENG_gendergap_marginsdydx.gph", replace
graph export "${output}/ENG_gendergap_marginsdydx.jpg", replace
graph export "${output}/ENG_gendergap_marginsdydx.pdf", replace


